<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang="zh-CN">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />















  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />




  
  
  
  

  
    
    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Inziu Iosevka Slab SC:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.2" rel="stylesheet" type="text/css" />


  <meta name="keywords" content="decision trees," />








  <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico?v=5.1.2" />






<meta name="description" content="Splitting datasets one  feature at a time: decision trees.The kNN algorithm do a great job of classifying, but it didn ’ t lead to any major insights about the data. One of the best things about decis">
<meta name="keywords" content="decision trees">
<meta property="og:type" content="article">
<meta property="og:title" content="Decision Trees">
<meta property="og:url" content="http://idmk.oschina.io/2017/09/01/decision-trees/index.html">
<meta property="og:site_name" content="苦舟">
<meta property="og:description" content="Splitting datasets one  feature at a time: decision trees.The kNN algorithm do a great job of classifying, but it didn ’ t lead to any major insights about the data. One of the best things about decis">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901022111744.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901022226432.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901022432269.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-2017090102282530.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901023222813.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-2017090102343748.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901023753658.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901023849341.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901024205760.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901024238448.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-2017090122575321.png">
<meta property="og:updated_time" content="2017-11-22T15:33:53.677Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Decision Trees">
<meta name="twitter:description" content="Splitting datasets one  feature at a time: decision trees.The kNN algorithm do a great job of classifying, but it didn ’ t lead to any major insights about the data. One of the best things about decis">
<meta name="twitter:image" content="http://idmk.oschina.io/2017/09/01/decision-trees/markdown-img-paste-20170901022111744.png">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Gemini',
    sidebar: {"position":"left","display":"hide","offset":12,"offset_float":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: true,
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://idmk.oschina.io/2017/09/01/decision-trees/"/>





  <title>Decision Trees | 苦舟</title>
  














</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-CN">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail ">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">苦舟</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">学海无涯，吾将上下求索。</p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br />
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-commonweal">
          <a href="/404.html" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br />
            
            公益404
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br />
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off"
             placeholder="搜索..." spellcheck="false"
             type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://idmk.oschina.io/2017/09/01/decision-trees/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="东木金">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/uploads/avatar.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="苦舟">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">Decision Trees</h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2017-09-01T01:45:46+08:00">
                2017-09-01
              </time>
            

            

            
          </span>

          
            <span class="post-category" >
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/ML/" itemprop="url" rel="index">
                    <span itemprop="name">ML</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>Splitting datasets one  feature at a time: decision trees.<br>The kNN algorithm do a great job of classifying, but it didn ’ t lead to any major insights about the data. One of the best things about decision trees is that humans can easily understand the data.</p>
<a id="more"></a>
<p>It has decision blocks (rectangles) and terminating blocks (ovals) where some conclusion has been reached. The right and left arrows coming out of the decision blocks are known as branches, and they can lead to other decision blocks or to a terminating block.</p>
<p> The decision tree does a great job of distilling data into knowledge. With this, you can take a set of unfamiliar data and extract a set of rules. The machine learning will take place as the machine creates these rules from the dataset.</p>
<h2 id="Tree-Construction"><a href="#Tree-Construction" class="headerlink" title="Tree Construction"></a>Tree Construction</h2><p> To build a decision tree, you need to make a first decision on the dataset to dictatewhich feature is used to split the data. To determine this, you try every feature and measure which split will give you the best results.<br> After that, you ’ ll split the dataset into subsets. The subsets will then traverse down the branches of the first decision node.<br> If the data on the branches is the same class, then you ’ ve properly classified it and don ’ t need to continue splitting it.<br> If the data isn ’ t the same, then you need to repeat the splitting process on this subset. The decision on how to split this subset is done the same way as the original dataset, and you repeat this process until you ’ ve classified all the data.</p>
<p>Pseudo-code for a function called <code>create_branch()</code> would look like this:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div></pre></td><td class="code"><pre><div class="line">Check if every item in the dataset is in the same class:</div><div class="line">    If so return the class label</div><div class="line">    Else</div><div class="line">        find the best feature to split the data</div><div class="line">        split the dataset</div><div class="line">        create a branch node</div><div class="line">        for each split</div><div class="line">            call `create_branch()` and add the result to the branch node</div><div class="line">        return branch node</div></pre></td></tr></table></figure></p>
<p>We ’ ll write this in Python later, but first, we need to address how to split the dataset.</p>
<h2 id="How-To-Split-The-Dataset-–-Feature-Selection"><a href="#How-To-Split-The-Dataset-–-Feature-Selection" class="headerlink" title="How To Split The Dataset – Feature Selection"></a>How To Split The Dataset – Feature Selection</h2><p><a href="http://en.wikipedia.org/wiki/ID3_algorithm" target="_blank" rel="external">ID3 algorithm</a><br><a href="https://www.wikiwand.com/en/C4.5_algorithm" target="_blank" rel="external">C4.5 algorithm</a><br><a href="https://www.wikiwand.com/en/Predictive_analytics#/Classification_and_regression_trees_.28CART.29" target="_blank" rel="external">CART</a></p>
<h3 id="Information-Theory"><a href="#Information-Theory" class="headerlink" title="Information Theory"></a>Information Theory</h3><ul>
<li>Information Gain<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901022111744.png" alt="markdown-img-paste-20170901022111744.png" title="">
注：这个根据目标类别分类得出的信息熵，在样本给出的情况下就已经知晓，根据概率统计，也称经验熵。<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901022226432.png" alt="markdown-img-paste-20170901022226432.png" title="">
注：这个实际上是经验条件熵，因为确认是在 A 属性划分出子集的前提下再按照目标类别分类得出的熵的期望。<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901022432269.png" alt="markdown-img-paste-20170901022432269.png" title="">
ID3 算法就是基于信息增益来衡量属性（即特征）划分数据的能力，进而为特征（即属性）选择提供原则。<br>但是：信息增益选择方法有一个很大的缺陷，它总是会倾向于选择属性值多的属性。</li>
<li>Gain Ratio<br>增益比克服了信息增益倾向于选择属性值多的属性的缺陷<img src="/2017/09/01/decision-trees/markdown-img-paste-2017090102282530.png" alt="markdown-img-paste-2017090102282530.png" title="">
注：分裂信息即按照某个属性划分的信息熵。增益比率定义为信息增益与分裂信息的比率：<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901023222813.png" alt="markdown-img-paste-20170901023222813.png" title="">
如果一个属性的取值很多，那么 SplitInfoR(D) 会大，从而使 GainRatio(R) 变小。这样就能克服了 Information Gain 的缺陷。<br>不过增益比率也有缺点，SplitInfo(D) 可能取 0，此时没有计算意义；且当 SplitInfo(D) 趋向于 0 时，GainRatio(R) 的值变得不可信，改进的措施就是在分母加一个平滑，这里加一个所有分裂信息的平均值：<img src="/2017/09/01/decision-trees/markdown-img-paste-2017090102343748.png" alt="markdown-img-paste-2017090102343748.png" title=""></li>
<li>Gini Coefficient<br>基尼指数的定义：在分类问题中，假设有 k 个类，样本点属于第 k 类的概率为 $p\left( k \right)$，则概率分布的基尼指数定义为：<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901023753658.png" alt="markdown-img-paste-20170901023753658.png" title="">
对于给定的样本集合 D 的基尼指数为：<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901023849341.png" alt="markdown-img-paste-20170901023849341.png" title="">
这里，$C_{k}$ 是 D 中属于第 k 类的样本子集，k 是类的个数。<br>如果样本集合 D 根据特征 A 是否取某一可能值α被分割成 D1 和 D2 两部分，即<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901024205760.png" alt="markdown-img-paste-20170901024205760.png" title="">
则在特征 A 的条件下，集合 D 的基尼指数定义为<img src="/2017/09/01/decision-trees/markdown-img-paste-20170901024238448.png" alt="markdown-img-paste-20170901024238448.png" title="">
基尼指数 Gini(D) 表示集合 D 的不确定性，基尼指数 Gini(D, A) 表示经 A= α分割后集合 D 的不确定性。基尼指数值越大，样本集合的不确定性也就越大，这一点与熵相似。</li>
</ul>
<h3 id="特征选择算法"><a href="#特征选择算法" class="headerlink" title="特征选择算法"></a>特征选择算法</h3><p>ID3 算法基于信息增益来衡量属性（即特征）划分数据的能力；C4.5 算法基于信息增益率来衡量属性划分数据的能力；CART 生成算法基于基尼指数来衡量属性划分数据的能力。</p>
<p>我们主要使用 CART 生成算法，下面介绍 CART 生成算法。</p>
<h2 id="CART-生成算法"><a href="#CART-生成算法" class="headerlink" title="CART 生成算法"></a>CART 生成算法</h2><p>二叉树（Binary Tree）是 CART 模型的代表之一。这里所说的二叉树，与数据结构和算法里面所说的二叉树别无二致。<br>每个节点代表在节点处有一个输入变量被传入，并根据某些变量被分类（我们假定该变量是数值型的）。树的叶节点（又叫做终端节点，Terminal Node）由输出变量构成，它被用于进行预测。<br>在树被创建完成之后，每个新的数据样本都将按照每个节点的分割条件，沿着该树从顶部往下，直到输出一个最终决策。</p>
<p>创建一个二元分类树实际上是一个分割输入空间的过程。递归二元分类（Recursive Binary Splitting）是一个被用于分割空间的贪心算法。这实际上是一个数值过程：当一系列的输入值被排列好后，它将尝试一系列的分割点，测试它们分类完后的基尼指数。<br>基尼指数主要在 CART 算法中用到，随机森林中用到的属性划分标准也是它。Gini index 划分是二元的，它度量的是数据分区或训练元组集 D 的不纯度，表示的是一个随机选中的样本在子集中被分错的可能性。计算方式如下：<br>$$Gini(D)=1-\sum p^{2}_i ，其中，p_i 是 D 中元组为 C_i 类的概率。$$<br>Gini 指数越大，不纯度越大，越不容易区分。假设 A 有 v 个不同的值出现在特征 D 中，它的二元划分有 2v − 22v − 2 种（除去自己和空集）。当考虑二元划分裂时，计算每个结果分区的不纯度加权和。比如 A 有两个值，则特征 D 被划分成 D1 和 D2, 这时 Gini 指数为：<br><img src="/2017/09/01/decision-trees/markdown-img-paste-2017090122575321.png" alt="markdown-img-paste-2017090122575321.png" title=""><br>上面的式子表示的是不确定性的大小。对于每个属性，考虑每种可能的二元划分，对于离散值属性，选择该属性产生最小 Gini 指数的自己作为它的分裂信息。</p>
<h3 id="CART-是怎样用于分类的"><a href="#CART-是怎样用于分类的" class="headerlink" title="CART 是怎样用于分类的"></a>CART 是怎样用于分类的</h3><p>分类回归树是一棵二叉树，且每个非叶子节点都有两个孩子<br>CART 与 ID3 区别： CART 中用于选择变量的不纯性度量是 Gini 指数； 如果目标变量是标称的，并且是具有两个以上的类别，则 CART 可能考虑将目标类别合并成两个超类别（双化）； 如果目标变量是连续的，则 CART 算法找出一组基于树的回归方程来预测目标变量。</p>
<p>GINI 指数： 1、是一种不等性度量； 2、通常用来度量收入不平衡，可以用来度量任何不均匀分布； 3、是介于 0~1 之间的数，0- 完全相等，1- 完全不相等； 4、总体内包含的类别越杂乱，GINI 指数就越大（跟熵的概念很相似）</p>
<h3 id="剪枝"><a href="#剪枝" class="headerlink" title="剪枝"></a>剪枝</h3><p>剪枝过程特别重要，所以在最优决策树生成过程中占有重要地位。有研究表明，剪枝过程的重要性要比树生成过程更为重要，对于不同的划分标准生成的最大树 (Maximum Tree)，在剪枝之后都能够保留最重要的属性划分，差别不大。反而是剪枝方法对于最优树的生成更为关键。</p>
<p>see others<br><a href="http://www.csuldw.com/2015/05/08/2015-05-08-decision%20tree/" target="_blank" rel="external">http://www.csuldw.com/2015/05/08/2015-05-08-decision%20tree/</a></p>

      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/decision-trees/" rel="tag"># decision trees</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/08/31/FP-growth/" rel="next" title="FP-Growth">
                <i class="fa fa-chevron-left"></i> FP-Growth
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/09/01/naive-Bayes/" rel="prev" title="Naïve Bayes">
                Naïve Bayes <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          
  <div class="comments" id="comments">
    
  </div>


        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap" >
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
          <img class="site-author-image" itemprop="image"
               src="/uploads/avatar.jpg"
               alt="东木金" />
          <p class="site-author-name" itemprop="name">东木金</p>
           
              <p class="site-description motion-element" itemprop="description">正在学习机器学习，希望能变得很强！</p>
          
        </div>
        <nav class="site-state motion-element">

          
            <div class="site-state-item site-state-posts">
              <a href="/archives/">
                <span class="site-state-item-count">162</span>
                <span class="site-state-item-name">日志</span>
              </a>
            </div>
          

          
            
            
            <div class="site-state-item site-state-categories">
              <a href="/categories/index.html">
                <span class="site-state-item-count">18</span>
                <span class="site-state-item-name">分类</span>
              </a>
            </div>
          

          
            
            
            <div class="site-state-item site-state-tags">
              <a href="/tags/index.html">
                <span class="site-state-item-count">42</span>
                <span class="site-state-item-name">标签</span>
              </a>
            </div>
          

        </nav>

        

        <div class="links-of-author motion-element">
          
            
              <span class="links-of-author-item">
                <a href="https://github.com/bdmk" target="_blank" title="GitHub">
                  
                    <i class="fa fa-fw fa-github"></i>
                  
                    
                      GitHub
                    
                </a>
              </span>
            
              <span class="links-of-author-item">
                <a href="mailto:catcherchan94@outlook.com" target="_blank" title="E-Mail">
                  
                    <i class="fa fa-fw fa-envelope"></i>
                  
                    
                      E-Mail
                    
                </a>
              </span>
            
          
        </div>

        
        

        
        

        


      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#Tree-Construction"><span class="nav-number">1.</span> <span class="nav-text">Tree Construction</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#How-To-Split-The-Dataset-–-Feature-Selection"><span class="nav-number">2.</span> <span class="nav-text">How To Split The Dataset – Feature Selection</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#Information-Theory"><span class="nav-number">2.1.</span> <span class="nav-text">Information Theory</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#特征选择算法"><span class="nav-number">2.2.</span> <span class="nav-text">特征选择算法</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#CART-生成算法"><span class="nav-number">3.</span> <span class="nav-text">CART 生成算法</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#CART-是怎样用于分类的"><span class="nav-number">3.1.</span> <span class="nav-text">CART 是怎样用于分类的</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#剪枝"><span class="nav-number">3.2.</span> <span class="nav-text">剪枝</span></a></li></ol></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright" >
  
  &copy;  2017 - 
  <span itemprop="copyrightYear">2018</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">东木金</span>
</div>


<div class="powered-by">
  由 <a class="theme-link" href="https://hexo.io">Hexo</a> 强力驱动
</div>

<div class="theme-info">
  主题 -
  <a class="theme-link" href="https://github.com/iissnan/hexo-theme-next">
    NexT.Gemini
  </a>
</div>


        

        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>

  
  <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>

  
  <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.2"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.2"></script>



  
  


  <script type="text/javascript" src="/js/src/affix.js?v=5.1.2"></script>

  <script type="text/javascript" src="/js/src/schemes/pisces.js?v=5.1.2"></script>



  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.2"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.2"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.2"></script>



  


  




	





  





  






  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'manual') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  
    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  


  

  

</body>
</html>
